Intro

This is just a work book from the data analysis that I did with a team at the 2nd Annual CUA data science hackathon. This is mostly meant to show some of my methods when dealing with 11 years of voter data and piecing together a story. Feel free to look over it, this file will soon be refined, as this is the unrefined work of a 5 hour coding sprint. I recomend looking at the end for the draft of the crime explorer app. With some more time invested into it could become a great tool.

Lets go

Historical data from 1992 to 2018 May need to look at history of all of these elections to get context. We have a couple tasks, find some reasoning behind all these datasets, figure out how to join them together. Or atleast we can join them, into a mega database and then nest them together by year or something. Building a solid database will be the only way to win this.

Starting from 08 the data seems to get tidy. 2010 is when they adopt a tidy data mindset with standardized data.

Importing

library(tidyverse)
## -- Attaching packages ---------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.1       v purrr   0.3.2  
## v tibble  2.1.1       v dplyr   0.8.0.1
## v tidyr   0.8.3       v stringr 1.4.0  
## v readr   1.3.1       v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readr)
imports <- function(){
April_26_2011_Special_Election_Certified_Results <- read_csv("DataElection/DataElection/2011/April_26_2011_Special_Election_Certified_Results.csv")
April_3_2012_Primary_Election_Certified_Results <- read_csv("DataElection/DataElection/2012/April_3_2012_Primary_Election_Certified_Results.csv")
May_15_2012_Special_Election_Certified_Results <- read_csv("DataElection/DataElection/2012/May_15_2012_Special_Election_Certified_Results.csv")
November_6_2012_General_and_Special_Election_Certified_Results <- read_csv("DataElection/DataElection/2012/November_6_2012_General_and_Special_Election_Certified_Results.csv")
April_23_2013_Special_Election_Certified_Results <- read_csv("DataElection/DataElection/2013/April_23_2013_Special_Election_Certified_Results.csv")
April_1_2014_Primary_Election_Certified_Results <- read_csv("DataElection/DataElection/2014/April_1_2014_Primary_Election_Certified_Results.csv")
July_15_2014_Special_Election_Certified_Results <- read_csv("DataElection/DataElection/2014/July_15_2014_Special_Election_Certified_Results.csv")
November_4_2014_General_Election_Certified_Results <- read_csv("DataElection/DataElection/2014/November_4_2014_General_Election_Certified_Results.csv")
April_28_2015_Special_Election_Certified_Results <- read_csv("DataElection/DataElection/2015/April_28_2015_Special_Election_Certified_Results.csv")
June_14_2016_Primary_Election_Certified_Results <- read_csv("DataElection/DataElection/2016/June_14_2016_Primary_Election_Certified_Results.csv")
November_8_2016_General_Election_Certified_Results <- read_csv("DataElection/DataElection/2016/November_8_2016_General_Election_Certified_Results.csv")
December_4_2018_Special_Election_Certified_Results <- read_csv("DataElection/DataElection/2018/December_4_2018_Special_Election_Certified_Results.csv")
June_19_2018_Primary_Election_Certified_Results <- read_csv("DataElection/DataElection/2018/June_19_2018_Primary_Election_Certified_Results.csv")
November_6_2018_General_Election_Certified_Results <- read_csv("DataElection/DataElection/2018/November_6_2018_General_Election_Certified_Results.csv")
bigdata <- list(April_26_2011_Special_Election_Certified_Results, April_3_2012_Primary_Election_Certified_Results, May_15_2012_Special_Election_Certified_Results, November_6_2012_General_and_Special_Election_Certified_Results, April_23_2013_Special_Election_Certified_Results, April_1_2014_Primary_Election_Certified_Results, July_15_2014_Special_Election_Certified_Results, November_4_2014_General_Election_Certified_Results, April_28_2015_Special_Election_Certified_Results, June_14_2016_Primary_Election_Certified_Results, November_8_2016_General_Election_Certified_Results) %>% reduce(rbind)

data2018 <- November_6_2018_General_Election_Certified_Results %>%
  full_join(June_19_2018_Primary_Election_Certified_Results) %>%
  full_join(December_4_2018_Special_Election_Certified_Results)


DF2011to2018 <- data2018 %>%
  full_join(bigdata,  by = c("ElectionDate" = "ELECTION_DATE", "ElectionName" = "ELECTION_NAME", "ContestNumber" = "CONTEST_ID", "ContestName" = "CONTEST_NAME", "PrecinctNumber" = "PRECINCT_NUMBER", "WardNumber" = "WARD", "Candidate" = "CANDIDATE", "Party" = "PARTY", "Votes" = "VOTES"))
return(DF2011to2018)
}
DF2011to2018 <- imports()
## Parsed with column specification:
## cols(
##   ELECTION_DATE = col_character(),
##   ELECTION_NAME = col_character(),
##   CONTEST_ID = col_double(),
##   CONTEST_NAME = col_character(),
##   PRECINCT_NUMBER = col_double(),
##   WARD = col_double(),
##   CANDIDATE = col_character(),
##   PARTY = col_character(),
##   VOTES = col_double()
## )
## Parsed with column specification:
## cols(
##   ELECTION_DATE = col_character(),
##   ELECTION_NAME = col_character(),
##   CONTEST_ID = col_double(),
##   CONTEST_NAME = col_character(),
##   PRECINCT_NUMBER = col_double(),
##   WARD = col_double(),
##   CANDIDATE = col_character(),
##   PARTY = col_character(),
##   VOTES = col_double()
## )
## Parsed with column specification:
## cols(
##   ELECTION_DATE = col_character(),
##   ELECTION_NAME = col_character(),
##   CONTEST_ID = col_double(),
##   CONTEST_NAME = col_character(),
##   PRECINCT_NUMBER = col_double(),
##   WARD = col_double(),
##   CANDIDATE = col_character(),
##   PARTY = col_character(),
##   VOTES = col_double()
## )
## Parsed with column specification:
## cols(
##   ELECTION_DATE = col_character(),
##   ELECTION_NAME = col_character(),
##   CONTEST_ID = col_double(),
##   CONTEST_NAME = col_character(),
##   PRECINCT_NUMBER = col_double(),
##   WARD = col_double(),
##   CANDIDATE = col_character(),
##   PARTY = col_character(),
##   VOTES = col_double()
## )
## Parsed with column specification:
## cols(
##   ELECTION_DATE = col_character(),
##   ELECTION_NAME = col_character(),
##   CONTEST_ID = col_double(),
##   CONTEST_NAME = col_character(),
##   PRECINCT_NUMBER = col_double(),
##   WARD = col_double(),
##   CANDIDATE = col_character(),
##   PARTY = col_character(),
##   VOTES = col_double()
## )
## Parsed with column specification:
## cols(
##   ELECTION_DATE = col_character(),
##   ELECTION_NAME = col_character(),
##   CONTEST_ID = col_double(),
##   CONTEST_NAME = col_character(),
##   PRECINCT_NUMBER = col_double(),
##   WARD = col_double(),
##   CANDIDATE = col_character(),
##   PARTY = col_character(),
##   VOTES = col_double()
## )
## Parsed with column specification:
## cols(
##   ELECTION_DATE = col_character(),
##   ELECTION_NAME = col_character(),
##   CONTEST_ID = col_double(),
##   CONTEST_NAME = col_character(),
##   PRECINCT_NUMBER = col_double(),
##   WARD = col_double(),
##   CANDIDATE = col_character(),
##   PARTY = col_character(),
##   VOTES = col_double()
## )
## Parsed with column specification:
## cols(
##   ELECTION_DATE = col_character(),
##   ELECTION_NAME = col_character(),
##   CONTEST_ID = col_double(),
##   CONTEST_NAME = col_character(),
##   PRECINCT_NUMBER = col_double(),
##   WARD = col_double(),
##   CANDIDATE = col_character(),
##   PARTY = col_character(),
##   VOTES = col_double()
## )
## Parsed with column specification:
## cols(
##   ELECTION_DATE = col_character(),
##   ELECTION_NAME = col_character(),
##   CONTEST_ID = col_double(),
##   CONTEST_NAME = col_character(),
##   PRECINCT_NUMBER = col_double(),
##   WARD = col_double(),
##   CANDIDATE = col_character(),
##   PARTY = col_character(),
##   VOTES = col_double()
## )
## Parsed with column specification:
## cols(
##   ELECTION_DATE = col_character(),
##   ELECTION_NAME = col_character(),
##   CONTEST_ID = col_double(),
##   CONTEST_NAME = col_character(),
##   PRECINCT_NUMBER = col_double(),
##   WARD = col_double(),
##   CANDIDATE = col_character(),
##   PARTY = col_character(),
##   VOTES = col_double()
## )
## Parsed with column specification:
## cols(
##   ELECTION_DATE = col_character(),
##   ELECTION_NAME = col_character(),
##   CONTEST_ID = col_double(),
##   CONTEST_NAME = col_character(),
##   PRECINCT_NUMBER = col_double(),
##   WARD = col_double(),
##   CANDIDATE = col_character(),
##   PARTY = col_character(),
##   VOTES = col_double()
## )
## Parsed with column specification:
## cols(
##   ElectionDate = col_character(),
##   ElectionName = col_character(),
##   ContestNumber = col_double(),
##   ContestName = col_character(),
##   PrecinctNumber = col_double(),
##   WardNumber = col_double(),
##   Candidate = col_character(),
##   Party = col_character(),
##   Votes = col_double()
## )
## Parsed with column specification:
## cols(
##   ElectionDate = col_character(),
##   ElectionName = col_character(),
##   ContestNumber = col_double(),
##   ContestName = col_character(),
##   PrecinctNumber = col_double(),
##   WardNumber = col_double(),
##   Candidate = col_character(),
##   Party = col_character(),
##   Votes = col_double()
## )
## Parsed with column specification:
## cols(
##   ElectionDate = col_character(),
##   ElectionName = col_character(),
##   ContestNumber = col_double(),
##   ContestName = col_character(),
##   PrecinctNumber = col_double(),
##   WardNumber = col_double(),
##   Candidate = col_character(),
##   Party = col_character(),
##   Votes = col_double()
## )
## Joining, by = c("ElectionDate", "ElectionName", "ContestNumber", "ContestName", "PrecinctNumber", "WardNumber", "Candidate", "Party", "Votes")
## Joining, by = c("ElectionDate", "ElectionName", "ContestNumber", "ContestName", "PrecinctNumber", "WardNumber", "Candidate", "Party", "Votes")

EDA

library(DataExplorer)
str(DF2011to2018)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 105671 obs. of  9 variables:
##  $ ElectionDate  : chr  "11/6/2018 12:00:00 AM" "11/6/2018 12:00:00 AM" "11/6/2018 12:00:00 AM" "11/6/2018 12:00:00 AM" ...
##  $ ElectionName  : chr  "General Election" "General Election" "General Election" "General Election" ...
##  $ ContestNumber : num  -2 -1 301 301 301 301 301 301 301 301 ...
##  $ ContestName   : chr  "REGISTERED VOTERS - TOTAL" "BALLOTS CAST - TOTAL" "DELEGATE TO THE HOUSE OF REPRESENTATIVES" "DELEGATE TO THE HOUSE OF REPRESENTATIVES" ...
##  $ PrecinctNumber: num  1 1 1 1 1 1 1 1 1 1 ...
##  $ WardNumber    : num  6 6 6 6 6 6 6 6 6 6 ...
##  $ Candidate     : chr  NA NA "Bruce Majors" "Natale \"Lino\" Stracuzzi" ...
##  $ Party         : chr  "CITYWIDE" "CITYWIDE" "LIB" "STG" ...
##  $ Votes         : num  6736 2813 40 96 2390 ...
plot_intro(DF2011to2018)

plot_missing(DF2011to2018)

plot_bar(DF2011to2018)
## 2 columns ignored with more than 50 categories.
## ContestName: 749 categories
## Candidate: 1688 categories

plot_qq(DF2011to2018)

#we need to clean ElectionName so it works better
#Election name to Factor
#Character to Date
# Party to Factor
#Party to Factor
#Precinct and ward to factor, contest to continuous
COPY <- DF2011to2018
COPY$ElectionName <- str_replace(COPY$ElectionName, "D.C. General Election", "General Election")
COPY$ElectionName <- str_replace(COPY$ElectionName, "D.C. Generation Election", "General Election")
COPY$ElectionName <- str_replace(COPY$ElectionName, "D.C. Special Election", "Special Election")
COPY$ElectionName <- str_replace(COPY$ElectionName, "D.C. Primary Election", "Primary Election")
plot_bar(COPY)
## 2 columns ignored with more than 50 categories.
## ContestName: 749 categories
## Candidate: 1688 categories

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
COPY$ElectionDate <- str_replace(COPY$ElectionDate, "6/19/2018 12:00:00 AM", "6/19/2018")
COPY$ElectionDate <- str_replace(COPY$ElectionDate, "11/6/2018 12:00:00 AM", "11/6/2018")
COPY$ElectionDate <- str_replace(COPY$ElectionDate, "12/4/2018 12:00:00 AM", "12/4/2018")
COPY$ElectionDate <- as.Date(COPY$ElectionDate, format = "%m/%d/%Y")
COPY$ContestName <- factor(COPY$ContestName)
COPY$WardNumber <- factor(COPY$WardNumber)
COPY$PrecinctNumber <- factor(COPY$PrecinctNumber)
str(COPY)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 105671 obs. of  9 variables:
##  $ ElectionDate  : Date, format: "2018-11-06" "2018-11-06" ...
##  $ ElectionName  : chr  "General Election" "General Election" "General Election" "General Election" ...
##  $ ContestNumber : num  -2 -1 301 301 301 301 301 301 301 301 ...
##  $ ContestName   : Factor w/ 749 levels "ADVISORY NEIGHBORHOOD COMMISSIONER 1A01",..: 725 617 656 656 656 656 656 656 656 656 ...
##  $ PrecinctNumber: Factor w/ 143 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ WardNumber    : Factor w/ 8 levels "1","2","3","4",..: 6 6 6 6 6 6 6 6 6 6 ...
##  $ Candidate     : chr  NA NA "Bruce Majors" "Natale \"Lino\" Stracuzzi" ...
##  $ Party         : chr  "CITYWIDE" "CITYWIDE" "LIB" "STG" ...
##  $ Votes         : num  6736 2813 40 96 2390 ...
# extract totals into a total data frame
unique(COPY$ElectionDate)
##  [1] "2018-11-06" "2018-06-19" "2018-12-04" "2011-04-26" "2012-04-03"
##  [6] "2012-05-15" "2012-11-06" "2013-04-23" "2014-04-01" "2014-07-15"
## [11] "2014-11-04" "2015-04-28" "2016-06-14" "2016-11-08"
COPY2 <- COPY
totals <- COPY2 %>%
  filter(str_detect(ContestName, "REGISTERED")) %>%
  mutate(totals = Votes) %>%
  select(-Votes)

COPY2$ContestName <- tolower(COPY2$ContestName)
removal <- str_detect(COPY2$ContestName, "registered")
COPY2 <- COPY2[-removal, ]

copy3 <- COPY2 %>%
  full_join(totals[, c("ElectionDate", "PrecinctNumber", "WardNumber", "totals")],  by = c("ElectionDate", "PrecinctNumber", "WardNumber"))
library(readxl)
datelist <- read_excel("datelist.xlsx")
## New names:
## * `` -> ...1
str(datelist)
## Classes 'tbl_df', 'tbl' and 'data.frame':    14 obs. of  11 variables:
##  $ ...1         : num  1 2 3 4 5 6 7 8 9 10 ...
##  $ Date         : POSIXct, format: "2018-11-06" "2018-06-19" ...
##  $ MaximumTemp  : num  60 90 43 84 71 81 45 62 62 88 ...
##  $ MinimumTemp  : num  51 70 29 68 32 65 28 46 38 71 ...
##  $ AvgTemp      : num  55.5 80 36 76 51.5 73 36.5 54 50 79.5 ...
##  $ Departure    : num  5.9 6.3 -4 19 2 10.4 -13.1 -2.1 1.2 2.2 ...
##  $ HDD          : num  9 0 29 0 13 0 28 11 15 0 ...
##  $ CDD          : num  0 15 0 11 0 8 0 0 0 15 ...
##  $ Precipitation: num  0.92 0.01 0 0 0 0.39 0 0 0 0.84 ...
##  $ New Snow     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Snow Depth   : num  0 0 0 0 0 0 0 0 0 0 ...
COPY2$ElectionDate <- as.POSIXct(COPY2$ElectionDate)
COPY2 %>%
  left_join(datelist,  by = c("ElectionDate" = "Date")) %>%
  ggplot(aes(x = ElectionDate, y = Votes, fill = AvgTemp)) + 
  geom_bar(stat = "identity")

totals2 <- totals %>%
  group_by(WardNumber, ElectionDate) %>%
  summarize(totalsum = sum(totals))
COPY2 %>%
  filter(str_detect(ContestName, "total")) %>%
  group_by(WardNumber, ElectionDate) %>%
  summarize(Votesum = sum(Votes)) %>%
  ggplot(aes(x = WardNumber, y = Votesum)) + 
  geom_bar(stat = "identity", fill = "green") + 
  geom_bar(data = totals2, aes(x = WardNumber, y = totalsum), fill = "red", alpha = 0.8, stat = "identity", inherit.aes = FALSE) 

library(ggthemes)
COPY2 %>%
  group_by(WardNumber) %>%
  summarize(Votesum = sum(Votes)) %>%
  ggplot(aes(x = WardNumber, y = Votesum)) +
  geom_bar(stat = "identity") + 
  theme_stata() + 
  xlab("Ward") + 
  ylab("Sum of Votes") +
  ggtitle("Votes per Ward", subtitle = "2011 to 2018")

import_crime <- function(){
Crime_Incidents_in_2013 <- read_csv("drive-download-20190330T183559Z-001/Crime_Incidents_in_2013.csv")
Crime_Incidents_in_2014 <- read_csv("drive-download-20190330T183559Z-001/Crime_Incidents_in_2014.csv")
Crime_Incidents_in_2015 <- read_csv("drive-download-20190330T183559Z-001/Crime_Incidents_in_2015.csv")
Crime_Incidents_in_2016 <- read_csv("drive-download-20190330T183559Z-001/Crime_Incidents_in_2016.csv")
Crime_Incidents_in_2017 <- read_csv("drive-download-20190330T183559Z-001/Crime_Incidents_in_2017.csv")
Crime_Incidents_in_2018 <- read_csv("drive-download-20190330T183559Z-001/Crime_Incidents_in_2018.csv")
crime_data <- list(Crime_Incidents_in_2013, Crime_Incidents_in_2014, Crime_Incidents_in_2015, Crime_Incidents_in_2016, Crime_Incidents_in_2017, Crime_Incidents_in_2018) %>% reduce(rbind)
return(crime_data)
}
crime_data <- import_crime()
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   X = col_double(),
##   Y = col_double(),
##   REPORT_DAT = col_datetime(format = ""),
##   XBLOCK = col_double(),
##   YBLOCK = col_double(),
##   WARD = col_double(),
##   DISTRICT = col_double(),
##   PSA = col_double(),
##   LATITUDE = col_double(),
##   LONGITUDE = col_double(),
##   START_DATE = col_datetime(format = ""),
##   END_DATE = col_datetime(format = ""),
##   OBJECTID = col_double()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   X = col_double(),
##   Y = col_double(),
##   REPORT_DAT = col_datetime(format = ""),
##   XBLOCK = col_double(),
##   YBLOCK = col_double(),
##   WARD = col_double(),
##   DISTRICT = col_double(),
##   PSA = col_double(),
##   LATITUDE = col_double(),
##   LONGITUDE = col_double(),
##   START_DATE = col_datetime(format = ""),
##   END_DATE = col_datetime(format = ""),
##   OBJECTID = col_double()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   X = col_double(),
##   Y = col_double(),
##   REPORT_DAT = col_datetime(format = ""),
##   XBLOCK = col_double(),
##   YBLOCK = col_double(),
##   WARD = col_double(),
##   DISTRICT = col_double(),
##   PSA = col_double(),
##   LATITUDE = col_double(),
##   LONGITUDE = col_double(),
##   START_DATE = col_datetime(format = ""),
##   END_DATE = col_datetime(format = ""),
##   OBJECTID = col_double()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   X = col_double(),
##   Y = col_double(),
##   REPORT_DAT = col_datetime(format = ""),
##   XBLOCK = col_double(),
##   YBLOCK = col_double(),
##   WARD = col_double(),
##   DISTRICT = col_double(),
##   PSA = col_double(),
##   LATITUDE = col_double(),
##   LONGITUDE = col_double(),
##   START_DATE = col_datetime(format = ""),
##   END_DATE = col_datetime(format = ""),
##   OBJECTID = col_double()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   X = col_double(),
##   Y = col_double(),
##   REPORT_DAT = col_datetime(format = ""),
##   XBLOCK = col_double(),
##   YBLOCK = col_double(),
##   WARD = col_double(),
##   DISTRICT = col_double(),
##   PSA = col_double(),
##   LATITUDE = col_double(),
##   LONGITUDE = col_double(),
##   START_DATE = col_datetime(format = ""),
##   END_DATE = col_datetime(format = ""),
##   OBJECTID = col_double()
## )
## See spec(...) for full column specifications.
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   X = col_double(),
##   Y = col_double(),
##   CCN = col_double(),
##   REPORT_DAT = col_datetime(format = ""),
##   XBLOCK = col_double(),
##   YBLOCK = col_double(),
##   WARD = col_double(),
##   DISTRICT = col_double(),
##   PSA = col_double(),
##   LATITUDE = col_double(),
##   LONGITUDE = col_double(),
##   START_DATE = col_datetime(format = ""),
##   END_DATE = col_datetime(format = ""),
##   OBJECTID = col_double()
## )
## See spec(...) for full column specifications.
crime_sum <- crime_data %>%
  group_by(WARD) %>%
  summarize(n = n()) %>%
  na.omit()
crime_data %>%
  group_by(OFFENSE) %>%
  summarise()
crime_data %>%
  filter(OFFENSE == "HOMICIDE") %>%
  group_by(WARD) %>%
  summarize(crimenumber = n()) %>%
  ggplot(aes(x = factor(WARD), y = crimenumber)) + 
  geom_bar(stat = "identity") +
  xlab("Ward") +
  theme_stata() +
  ylab("Frequency of Homicide") +
  ggtitle("Homicide frequency per ward", subtitle = "2013 to 2018")

Spatial Visualizations

library(ggmap)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
DCMAP <- get_map("Washington, DC", zoom = 12)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=Washington,%20DC&zoom=12&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Washington,+DC&key=xxx
set.seed(20)
clusters <- kmeans(crime_data[ ,1:2], 8)
crime_data$ESTCLUSTER <- as.factor(clusters$cluster)
ggmap(DCMAP) + geom_point(data = crime_data, aes(x = X, y = Y, col = factor(ESTCLUSTER)), alpha = 0.4) + 
  ggtitle("DC Wards using Kmeans on crime data")
## Warning: Removed 4582 rows containing missing values (geom_point).

ggmap(DCMAP) + geom_point(data = crime_data, aes(x = X, y = Y, col = factor(WARD)), alpha = 0.4) 
## Warning: Removed 4582 rows containing missing values (geom_point).

ggmap(DCMAP) + 
  geom_bin2d(data = crime_data %>% filter(OFFENSE == 'HOMICIDE'), aes(x = X, y = Y), alpha  = 0.8)
## Warning: Removed 50 rows containing non-finite values (stat_bin2d).

library(tidyverse)
library(maptools)
## Loading required package: sp
## Checking rgeos availability: TRUE
library(sp)
library(mapdata)
## Loading required package: maps
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(rgl)
library(rgdal)
## rgdal: version: 1.4-3, (SVN revision 828)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
##  Path to GDAL shared files: C:/Users/bingo/Documents/R/win-library/3.5/rgdal/gdal
##  GDAL binary built with GEOS: TRUE 
##  Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
##  Path to PROJ.4 shared files: C:/Users/bingo/Documents/R/win-library/3.5/rgdal/proj
##  Linking to sp version: 1.3-1
library(ggplot2)
library(rgeos)
## rgeos version: 0.4-2, (SVN revision 581)
##  GEOS runtime version: 3.6.1-CAPI-1.10.1 
##  Linking to sp version: 1.3-1 
##  Polygon checking: TRUE
## 
## Attaching package: 'rgeos'
## The following object is masked from 'package:rgl':
## 
##     triangulate
precinct <- readOGR("C:/Users/bingo/Desktop/CUA HaXS/Voting_Precinct__2012/Voting_Precinct__2012.shp") 
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\bingo\Desktop\CUA HaXS\Voting_Precinct__2012\Voting_Precinct__2012.shp", layer: "Voting_Precinct__2012"
## with 143 features
## It has 6 fields
## Integer64 fields read as strings:  OBJECTID_1 OBJECTID
ward <- readOGR("C:/Users/bingo/Desktop/CUA HaXS/Ward_from_2012/Ward_from_2012.shp")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\bingo\Desktop\CUA HaXS\Ward_from_2012\Ward_from_2012.shp", layer: "Ward_from_2012"
## with 8 features
## It has 82 fields
## Integer64 fields read as strings:  OBJECTID WARD POP_2000 POP_2010 POP_2011_2
precinct2 <- fortify(precinct)
## Regions defined for each Polygons
ward2 <- fortify(ward)
## Regions defined for each Polygons
precinctdat <- as.data.frame(precinct)
map1 <- ggplot() + 
  geom_polygon(data = precinct2, aes(long, lat, group = group), 
               colour = "black", fill = "white") +
  geom_polygon(data = ward2, aes(long, lat, group = group, fill = group),
               colour = "black", alpha = 0.3, size = 1) +
  ggtitle("DC Precincts and Wards",
          subtitle = "Est. 2012") +
  coord_quickmap()+
  theme_minimal()
map1

#opens up possibility of DC chloropleth map (interactive too)
library(ggmap)
library(sf)
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
DCMAP <- get_map("Washington, DC", zoom = 12)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=Washington,%20DC&zoom=12&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=Washington,+DC&key=xxx
wardsp <- spTransform(ward, CRS("+proj=longlat +datum=WGS84"))
wardsp2 <- fortify(wardsp)
## Regions defined for each Polygons
#Add another column for ward
WARDWITHLABELS <- ward2 %>%
  mutate(WARD = 
           case_when(
             group == 0.1 ~ 8,
             group == 1.1 ~ 6,
             group == 2.1 ~ 7,
             group == 3.1 ~ 2,
             group == 4.1 ~ 1, 
             group == 5.1 ~ 5,
             group == 6.1 ~ 3,
             group == 7.1 ~ 4
           ))

# Get centroids to plot labels
ward_Cent <- gCentroid(ward, byid = TRUE)
ward_CentPoints <- as.data.frame(ward_Cent)
Wardlabs <- ward_CentPoints %>%
  cbind(c(8, 6, 7, 2, 1, 5, 3, 4))
colnames(Wardlabs) <- c("x", "y", "WARD")
# Plot the points on the wards
ggplot() +
  geom_polygon(data = WARDWITHLABELS, aes(long, lat, group = group, fill = factor(WARD)),
               colour = "black", alpha = 0.3, size = 1) +
  geom_text(data = Wardlabs, aes(x, y, label = WARD), size = 5) +
  geom_point(data = crime_data %>% filter(OFFENSE == "HOMICIDE"), aes(x = X, y = Y), alpha = 0.4) +
  ggtitle("DC Wards",
          subtitle = "Est. 2012") +
  theme(legend.position = "none") +
  coord_quickmap()

#experiment with google maps
ggmap(DCMAP) + 
  geom_polygon(data = WARDWITHLABELS, aes(long, lat, group = group, fill = factor(WARD)),
               colour = "black", alpha = 0.3, size = 1)

library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
## 
##     wind
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
Sys.setenv('MAPBOX_TOKEN' = 'pk.eyJ1IjoibWFzdGVyYmluZ28xIiwiYSI6ImNqdDluOHo2aDAxenQ0OW51dmdkOGIyaDkifQ.KMv1Wkds1VtmtzOOmMWuiw')
plot_mapbox(mode = 'scattermapbox') %>%
  add_polygons(data = WARDWITHLABELS , 
            x = ~ long, y = ~ lat, split = ~factor(WARD), 
            text = ~WARD, hoverinfo = "none", alpha = 0.3) %>%
  add_markers(data = crime_data %>% filter(OFFENSE == "HOMICIDE"), x = ~X, y = ~Y, alpha = 1, color = "blue") %>%
  layout(mapbox = list(zoom = 10.5,
                       center = list(lat = ~median(WARDWITHLABELS$lat),
                                     lon = ~median(WARDWITHLABELS$long))
  ))
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

Attempting to make a small web app out of the crime data with a explorer.

library(crosstalk)
dooot <- SharedData$new(crime_data %>% filter(OFFENSE == "HOMICIDE") %>%
                          select(X, Y, METHOD, WARD))
bscols(
  plot_mapbox() %>%
    add_markers(data = dooot, x = ~X, y = ~Y, text = ~WARD, hoverinfo = "text") %>%
    highlight(on = "plotly_selected", dynamic = TRUE) %>%
    layout(mapbox = list(zoom = 10.5,
                       center = list(lat = ~median(WARDWITHLABELS$lat),
                                     lon = ~median(WARDWITHLABELS$long)))), 
  DT::datatable(dooot)
)
## Adding more colors to the selection color palette.
## Setting the `off` event (i.e., 'plotly_deselect') to match the `on` event (i.e., 'plotly_selected'). You can change this default via the `highlight()` function.